Load required libraries

library("limma")
library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("ConsensusClusterPlus")

# Get `magrittr` pipe
`%>%` <- dplyr::`%>%`

Import files and tables

file_psi <- "/Users/naqvia/Desktop/AS-DMG/analyses/pan_cancer/results/pan_cancer_splicing.thr10.report_select.remDup.txt"
psi_tab <- read.table(file_psi, row.names = 1, sep = "\t",header = TRUE)

Consensus clustering

rnames <- psi_tab[,1]
row.names(psi_tab) <- psi_tab$Splice_ID
mat_hm <- data.matrix(psi_tab[,2:ncol(psi_tab)])

d=mat_hm
# d[1:5,1:5]

## reduce the dataset to the top 5% most variable genes, measured by median absolute deviation
mads=apply(d,1,mad)
d=d[rev(order(mads))[1:10835],] ## top 1% .10*108352

## the default settings of the agglomerative hierarchical clustering algorithm using Pearson correlation distance, so it is appropriate to gene median center d using
d = sweep(d,1, apply(d,1,median,na.rm=T))


## remove NAs
is.na(d) <- sapply(d, is.infinite)
d[is.na(d)] <- 0
d[is.nan(d)] <- 0

hc/spearman

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="hc",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

pam/spearman

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="pam",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## k-means/spearman

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="km",distance="spearman",seed=123,innerLinkage = "average", finalLinkage = "average")
## Note: The km (kmeans) option only supports a euclidean distance metric when supplying a data matrix.  If you want to cluster a distance matrix, use a different algorithm such as 'hc' or 'pam'.  Changing distance to euclidean
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

hc/pearson

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="hc",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## pam/pearson

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="pam",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## k-means/pearson

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="km",distance="pearson",seed=123,innerLinkage = "average", finalLinkage = "average")
## Note: The km (kmeans) option only supports a euclidean distance metric when supplying a data matrix.  If you want to cluster a distance matrix, use a different algorithm such as 'hc' or 'pam'.  Changing distance to euclidean
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

hc/euclidean

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="hc",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## pam/euclidean

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="pam",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## k-means/euclidean

results = ConsensusClusterPlus((d),maxK=10,reps=100,pItem=0.8,
                     title="clustering",clusterAlg="km",distance="euclidean",seed=123,innerLinkage = "average", finalLinkage = "average")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

## clustered